#!/usr/bin/perl
# 
# Exercise 11.9
# 
# Write a recursive subroutine that extracts the primary amino acid sequence from 
# the SEQRES record type of a PDB file.
#
# Answer to Exercise 11.9
#
# Again, this is tricky because of the existence of separate chains.  I'll show you
# subroutine that works on a PDB file that only has one chain; and challenge you to
# extend it to the more general case of multiple chains.

use strict;
use warnings;
use BeginPerlBioinfo;

# Give the name of your PDB file (this one has only one chain)
my $pdbfile = 'pdb/44/pdb244l.ent';

# Get the file data, parse the record types
my @pdbfiledata = get_file_data($pdbfile);

# Extract all the record types from the PDB file data
my %recordtypes = parsePDBrecordtypes(@pdbfiledata);

# Extract the sequence
my $sequence = extractSEQRESrecursive(split /\n/, $recordtypes{'SEQRES'});

print "$sequence\n";

exit;

################################################################################
# Subroutines
################################################################################

# extractSEQRESrecursive
#
#-given an array containing SEQRES lines,
#    return the sequence
#

sub extractSEQRESrecursive {

    use strict;
    use warnings;

    my(@seqres) = @_;

    if( not @seqres ) {
        return '';
    }else{
	my($line) = shift(@seqres);

	# Process the first line, append the results of a recursive call
	#  on the rest of the lines
        # Residues start in column 20
        return iub3to1(substr($line, 19, 52)) . extractSEQRESrecursive(@seqres);
    }
}
